Install required libraries
# install.packages(c("igraph", "ggmap", "popgraph", "plyr", "ppls"))
library(knitr)
Let’s load libraries, stop and line data.
stops = read.csv("../crawler/output/stop.csv")
stops = unique(stops)
lines = read.csv("../crawler/output/linedetail.csv")
Now create edges by connecting adjacent stops in each line:
newedges = list()
k = 1
for (i in 1:nrow(lines)) {
line_name = as.character(lines$cdk_id[i])
line_stops = strsplit(as.character(lines$stop_list[i]), ";")[[1]]
for (j in 1:(length(line_stops)-1)){
newedges[[k]] = c(line_stops[j], line_stops[j+1], line_name)
k = k + 1
}
}
Then we can convert this edge list into a matrix with columns (V1, V2, Line):
matrix_edges = matrix(data=NA, nrow=length(newedges), ncol=3)
for (i in 1:length(newedges)){
matrix_edges[i, 1] = newedges[[i]][1]
matrix_edges[i, 2] = newedges[[i]][2]
matrix_edges[i, 3] = newedges[[i]][3]
}
Now we convert this matrix into a data frame with columns V1 (From), V2 (To) and Line (Edge label).
frame_edges = as.data.frame(matrix_edges, stringsAsFactors = TRUE, row.names=c("from", "to", "line"))
colnames(frame_edges) = c("V1","V2","Line")
Now that we have edges and nodes, we can construct a graph object.
library("igraph")
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
g = graph.data.frame(frame_edges, TRUE, stops$cdk_id)
V(g)$Latitude = stops$lat
V(g)$Longitude = stops$lon
V(g)$size = 0.5
V(g)$label = as.character(stops$name)
V(g)$labels = as.character(stops$name)
E(g)$label = as.character(frame_edges$Line)
Now create an aerial map:
library("ggmap")
## Loading required package: ggplot2
location = c(mean(stops$lon), mean(stops$lat))
map = get_map(location, maptype="watercolor", source="osm", zoom=10)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=41.034869,28.991441&zoom=10&size=640x640&maptype=terrain&sensor=false
Now draw the spatial map and plot nodes/edges on it:
library(popgraph)
p = ggmap(map)
p = p + geom_nodeset( aes(x=Longitude, y=Latitude, size=size, label=label), g)
p = p + geom_edgeset( aes(x=Longitude, y=Latitude, shape=label, label=label), g, color="yellow", alpha=0.3)
p + xlab("Longitude") + ylab("Latitude")
## Warning: Removed 424 rows containing missing values (geom_point).
## Warning: Removed 1453 rows containing missing values (geom_segment).
# ggsave ("map.png", dpi = 200)
We construct the graph and add an additional weight parameter to edges. Edges will be 0-1 normalized log of the frequencies (number of buses pass through that edge).
library(plyr)
##
## Attaching package: 'plyr'
##
## The following object is masked from 'package:maps':
##
## ozone
library(ppls)
## Loading required package: splines
## Loading required package: MASS
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
freqs = count(frame_edges, vars=c("V1","V2"))
g2 = graph.data.frame(freqs, TRUE, stops$cdk_id)
V(g2)$Latitude = stops$lat
V(g2)$Longitude = stops$lon
V(g2)$size = 0.5
V(g2)$label = as.character(stops$name)
V(g2)$labels = as.character(stops$name)
E(g2)$freq = as.numeric(freqs$freq)
E(g2)$weight = as.numeric(range01(log(as.numeric(freqs$freq))))
E(g2)$betweenness = edge.betweenness.estimate(g2, directed=TRUE, cutoff=10, weights = E(g2)$freq)
E(g2)$logbetweenness = log(E(g2)$betweenness)
Now let’s see the degree distribution of the weighted network:
plot(degree.distribution(g2), breaks=100, xlab="Derece", ylab="Bulunma Orani", main="Derece Dagilimi")
## Warning in plot.window(...): "breaks" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "breaks" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "breaks" is
## not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "breaks" is
## not a graphical parameter
## Warning in box(...): "breaks" is not a graphical parameter
## Warning in title(...): "breaks" is not a graphical parameter
table(degree(g2))
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 16 20
## 62 6678 1319 685 213 95 51 18 13 5 6 1 3 1 2
## 21 30 34
## 1 1 1
max_degree_node = which.max(degree.distribution(g2))
Now we draw the map with the edge thickness and color adjusted to edge weight (bus frequency).
location = c(mean(stops$lon), mean(stops$lat))
map11 = get_map(location, maptype="watercolor", source="osm", zoom=11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=41.034869,28.991441&zoom=11&size=640x640&maptype=terrain&sensor=false
p2 = ggmap(map11)
p2 = p2 + geom_nodeset( aes(x=Longitude, y=Latitude, size=size, label=label), g2)
p2 = p2 + geom_edgeset( aes(x=Longitude, y=Latitude, shape=label, label=label, size=weight, color=weight), g2, alpha=1.0) + scale_colour_continuous(low="#f98e9e", high="#9a0017")
p2 + xlab("Longitude") + ylab("Latitude")
## Warning: Removed 3107 rows containing missing values (geom_point).
## Warning: Removed 3728 rows containing missing values (geom_segment).
#ggsave ("map2.png", dpi = 200)
Lets see the histogram of the edge betweenness values that are greater than 1:
hist(E(g2)$logbetweenness)
Wee see that busiest edges are ones above logbetweenness 3. So remove less busy ones.
small_edges = subset(E(g2), E(g2)$logbetweenness < 3)
g3 = delete.edges(g2, small_edges)
Now plot the map according to logarithmic edge betweenness:
E(g3)$size=0.1
location = c(mean(stops$lon), mean(stops$lat))
map12 = get_map(location, maptype="watercolor", source="osm", zoom=12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=41.034869,28.991441&zoom=12&size=640x640&maptype=terrain&sensor=false
p3 = ggmap(map12)
#p3 = p3 + geom_nodeset( aes(x=Longitude, y=Latitude, size=size, label=label), g3)
p3 = p3 + geom_edgeset( aes(x=Longitude, y=Latitude, shape=label, label=label, size=size, color=logbetweenness), g3, alpha=1.0) + scale_colour_continuous(low="#f98e9e", high="#9a0017")
p3 + xlab("Longitude") + ylab("Latitude")
## Warning: Removed 573 rows containing missing values (geom_segment).
#ggsave ("map3.png", dpi = 200)
We can see that two bridges of Istanbul have the highest betweenness values. This means that they have very important roles in Istanbul transportation and a failure in the service would cause lots of delays. Next, we can see roads Besiktas-Levent, Sisli-Kagithane, around Sisli, Halic Bridge, Yenisahra E-5. Then we can see the highway (on the rightmost vertical) connecting E-5 to north bridge.
Calculate node betweenness and sort nodes by that:
V(g2)$betweenness = betweenness(g2, v=V(g2), directed = TRUE, weights = E(g2)$freq)
V(g2)$logbetweenness = log(V(g2)$betweenness)
dd = get.data.frame(g2, what=c("vertices"))
sorted_list = dd[order(dd$betweenness, decreasing = TRUE),]
top30 = sorted_list[1:30,c("label","logbetweenness")]
#for (i in 1:20 ) {
# print(sorted_list$label[i])
#}
print(kable(top30, format="markdown", digits=3, caption = "Top 30 nodes for Node Betweenness score"))
| label | logbetweenness | |
|---|---|---|
| gtfs.stop.istb.184901 | SABHA GKEN .D.H. | 17.156 |
| gtfs.stop.istb.192621 | KAVACIK KPRS | 17.114 |
| gtfs.stop.istb.184157 | TAKSM | 17.069 |
| gtfs.stop.istb.184900 | SABHA GKEN NZ. | 16.664 |
| gtfs.stop.istb.189156 | EMNN SKELE | 16.580 |
| gtfs.stop.istb.189160 | YENKAPI SAHL | 16.579 |
| gtfs.stop.istb.189003 | KUMKAPI | 16.579 |
| gtfs.stop.istb.189001 | ATLADIKAPI | 16.579 |
| gtfs.stop.istb.188999 | AKBIYIK | 16.579 |
| gtfs.stop.istb.188995 | SARAYBURNU | 16.579 |
| gtfs.stop.istb.184176 | SALIPAZARI | 16.561 |
| gtfs.stop.istb.184168 | TEKNK NVERSTE | 16.538 |
| gtfs.stop.istb.188372 | MEVLANAKAPI | 16.531 |
| gtfs.stop.istb.188376 | SLVRKAPI | 16.531 |
| gtfs.stop.istb.183387 | DOLMABAHE SARAYI | 16.526 |
| gtfs.stop.istb.184230 | MNATRK | 16.505 |
| gtfs.stop.istb.184225 | HAL KONGRE MERKEZ | 16.505 |
| gtfs.stop.istb.192622 | KAVACIK KPRS | 16.490 |
| gtfs.stop.istb.183444 | 4.LEVEND | 16.446 |
| gtfs.stop.istb.189209 | TOPKAPI ALT GET | 16.443 |
| gtfs.stop.istb.184410 | NN STADI | 16.431 |
| gtfs.stop.istb.183452 | FABRKALAR | 16.338 |
| gtfs.stop.istb.183454 | LEVENT | 16.334 |
| gtfs.stop.istb.183854 | Z.KUYU-METROBS 2 | 16.327 |
| gtfs.stop.istb.184169 | GMSUYU | 16.288 |
| gtfs.stop.istb.184223 | STLCE | 16.247 |
| gtfs.stop.istb.184219 | HALICIOLU 9 | 16.243 |
| gtfs.stop.istb.184216 | HALICIOLU 7 | 16.243 |
| gtfs.stop.istb.188266 | AYVANSARAY | 16.243 |
| gtfs.stop.istb.184115 | MECDYEKY-VYADK | 16.129 |
Now plot the map according to node betweenness:
location = c(mean(stops$lon), mean(stops$lat))
map10 = get_map(location, maptype="watercolor", source="osm", zoom=10)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=41.034869,28.991441&zoom=10&size=640x640&maptype=terrain&sensor=false
p4 = ggmap(map10)
p4 = p4 + geom_nodeset( aes(x=Longitude, y=Latitude, size=betweenness, label=label), g2, alpha=0.7)
p4 + xlab("Longitude") + ylab("Latitude")
## Warning: Removed 424 rows containing missing values (geom_point).
#ggsave ("map4.png", dpi = 200)
This time adjust alpha with 0-1 normalized node betweenness to see major ones. See in a more detailed map:
location = c(mean(stops$lon), mean(stops$lat))
map12 = get_map(location, maptype="watercolor", source="osm", zoom=12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=41.034869,28.991441&zoom=12&size=640x640&maptype=terrain&sensor=false
p5 = ggmap(map12)
p5 = p5 + geom_nodeset( aes(x=Longitude, y=Latitude, size=betweenness, label=label), g2, alpha=range01(V(g2)$betweenness))
p5 + xlab("Longitude") + ylab("Latitude")
## Warning: Removed 6343 rows containing missing values (geom_point).
#ggsave ("map5.png", dpi = 200)